rm(list=ls())

library(foreign)
data=read.dta('directory')

data$reached[data$family==3581][2]=0

## testable condition
Z.o=3-data$treatment
D.o=data$hsecontact
Y.o=data$voted02p
############
#########   placebo effect
N0=length(Y.o[Z.o==0&data$city=='DENVER'])
p0=mean(Y.o[Z.o==0&data$city=='DENVER'])
sqrt(p0*(1-p0)/N0)
N1=length(Y.o[Z.o==1&data$city=='DENVER'])
p1=mean(Y.o[Z.o==1&data$city=='DENVER'])
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)



N0=length(Y.o[Z.o==0&data$city=='MINNEAPOLIS'])
p0=mean(Y.o[Z.o==0&data$city=='MINNEAPOLIS'])
sqrt(p0*(1-p0)/N0)
N1=length(Y.o[Z.o==1&data$city=='MINNEAPOLIS'])
p1=mean(Y.o[Z.o==1&data$city=='MINNEAPOLIS'])
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)


N0=length(Y.o[Z.o==0])
p0=mean(Y.o[Z.o==0])
sqrt(p0*(1-p0)/N0)
N1=length(Y.o[Z.o==1])
p1=mean(Y.o[Z.o==1])
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)


#############   by household  
N=3861


Z=numeric(N)
D=numeric(N)
sex1=numeric(N)
sex2=numeric(N)
age1=numeric(N)
age2=numeric(N)
his1=numeric(N)
his2=numeric(N)
city=numeric(N)
Y1=numeric(N)
Y2=numeric(N)
party1=numeric(N)
party2=numeric(N)
for (i in 1:N){
	Z[i]=3-mean(data$treatment[data$family==i])
	D[i]=mean(data$hsecontact[data$family==i])
	city[i]=2-mean(data$city[data$family==i]=='DENVER')
	if (D[i]==1){
		Y1[i]=data$voted02p[data$family==i & data$reached==1]
		Y2[i]=data$voted02p[data$family==i & data$reached==0]
		age1[i]=data$age[data$family==i & data$reached==1]
		age2[i]=data$age[data$family==i & data$reached==0]
		sex1[i]=data$gender[data$family==i & data$reached==1]
		sex2[i]=data$gender[data$family==i & data$reached==0]
		his1[i]=data$voted00p[data$family==i & data$reached==1]
		his2[i]=data$voted00p[data$family==i & data$reached==0]
		party1[i]=as.numeric(data$party[data$family==i & data$reached==1]=='DEMOCRATIC')
		party2[i]=as.numeric(data$party[data$family==i & data$reached==0]=='DEMOCRATIC')
	}else{
		Y1[i]=NA
		Y2[i]=NA
		age1[i]=NA
		age2[i]=NA
		sex1[i]=NA
		sex2[i]=NA
		his1[i]=NA
		his2[i]=NA
		party1[i]=NA
		party2[i]=NA
		}
}
######## p1 is the turnout rate under GOTV and p0 is the turnout rate under Recycling
####  denver contacted
N1=length(Y1[Z==2&D==1&city==1])
p1=mean(Y1[Z==2&D==1&city==1])
sqrt(p1*(1-p1)/N1)
N0=length(Y1[Z==1&D==1&city==1])
p0=mean(Y1[Z==1&D==1&city==1])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)

## non-contact
N1=length(Y2[Z==2&D==1&city==1])
p1=mean(Y2[Z==2&D==1&city==1])
sqrt(p1*(1-p1)/N1)
N0=length(Y2[Z==1&D==1&city==1])
p0=mean(Y2[Z==1&D==1&city==1])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)

####  minneapolis contacted
N1=length(Y1[Z==2&D==1&city==2])
p1=mean(Y1[Z==2&D==1&city==2])
sqrt(p1*(1-p1)/N1)
N0=length(Y1[Z==1&D==1&city==2])
p0=mean(Y1[Z==1&D==1&city==2])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)

## non-contact
N1=length(Y2[Z==2&D==1&city==2])
p1=mean(Y2[Z==2&D==1&city==2])
sqrt(p1*(1-p1)/N1)
N0=length(Y2[Z==1&D==1&city==2])
p0=mean(Y2[Z==1&D==1&city==2])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)

#### pooled contacted
N1=length(Y1[Z==2&D==1])
p1=mean(Y1[Z==2&D==1])
sqrt(p1*(1-p1)/N1)
N0=length(Y1[Z==1&D==1])
p0=mean(Y1[Z==1&D==1])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)

## non-contact
N1=length(Y2[Z==2&D==1])
p1=mean(Y2[Z==2&D==1])
sqrt(p1*(1-p1)/N1)
N0=length(Y2[Z==1&D==1])
p0=mean(Y2[Z==1&D==1])
sqrt(p0*(1-p0)/N0)
p1-p0
sqrt(p0*(1-p0)/N0+p1*(1-p1)/N1)











